﻿// Copyright (c) zhenlei520 All rights reserved.
// Licensed under the MIT License. See License.txt in the project root for license information.
// 参考来源：https://sourceforge.net/projects/bloomfilternet/

using System;
using System.Collections;
using System.Collections.Generic;
using System.Security.Cryptography;
using System.Text;

namespace EInfrastructure.Core.Tools.Common.BloomFilter
{
    /// <summary>
    /// Implementation of a Bloom-filter, as described here:
    /// http://en.wikipedia.org/wiki/Bloom_filter
    ///
    /// Translated from Java to C# 4.0. Original Java Source (by Magnus Skjegstad) : http://github.com/magnuss/java-bloomfilter
    /// <remarks>
    ///		author Magnus Skjegstad (magnus@skjegstad.com)
    ///		translated into .NET 4.0 by Jeckyhl (@sourceforge)
    /// </remarks>
    /// </summary>
    public class BloomFilter
    {
        private BitArray _bitset;
        private int _bitSetSize;
        private double _bitsPerElement;
        private int _expectedNumberOfFilterElements; // expected (maximum) number of elements to be added
        private int _numberOfAddedElements; // number of elements actually added to the Bloom filter
        private int _k; // number of hash functions

        static readonly Encoding Charset = Encoding.UTF8; // encoding used for storing hash values as strings

        /// <summary>
        /// The digest method is reused between instances
        /// </summary>
        /// <remarks>MD5 gives good enough accuracy in most circumstances. Change to SHA1 if it's needed</remarks>
        static readonly KeyedHashAlgorithm DigestFunction = new HMACMD5();

        /// <summary>
        /// Constructs an empty Bloom filter. The total length of the Bloom filter will be
        /// c*n.
        /// </summary>
        /// <param name="c">is the number of bits used per element.</param>
        /// <param name="n">is the expected number of elements the filter will contain.</param>
        /// <param name="k">is the number of hash functions used.</param>
        public BloomFilter(double c, int n, int k)
        {
            this._expectedNumberOfFilterElements = n;
            this._k = k;
            this._bitsPerElement = c;
            this._bitSetSize = (int) Math.Ceiling(c * n);
            _numberOfAddedElements = 0;
            this._bitset = new BitArray(_bitSetSize);
        }

        /// <summary>
        /// Constructs an empty Bloom filter. The optimal number of hash functions (k) is estimated from the total size of the Bloom
        /// and the number of expected elements.
        /// </summary>
        /// <param name="bitSetSize">defines how many bits should be used in total for the filter.</param>
        /// <param name="expectedNumberOElements">defines the maximum number of elements the filter is expected to contain.</param>
        public BloomFilter(int bitSetSize, int expectedNumberOElements)
            : this(
                bitSetSize / (double) expectedNumberOElements,
                expectedNumberOElements,
                (int) Math.Round((bitSetSize / (double) expectedNumberOElements) * Math.Log(2.0))
            )
        {
        }

        /// <summary>
        /// Constructs an empty Bloom filter with a given false positive probability. The number of bits per
        /// element and the number of hash functions is estimated
        /// to match the false positive probability.
        /// </summary>
        /// <param name="falsePositiveProbability">is the desired false positive probability.</param>
        /// <param name="expectedNumberOfElements">is the expected number of elements in the Bloom filter.</param>
        public BloomFilter(double falsePositiveProbability, int expectedNumberOfElements)
            : this(
                Math.Ceiling(-(Math.Log(falsePositiveProbability) / Math.Log(2))) / Math.Log(2), // c = k / ln(2)
                expectedNumberOfElements,
                (int) Math.Ceiling(-(Math.Log(falsePositiveProbability) / Math.Log(2))) // k = ceil(-log_2(false prob.))
            )
        {
        }

        /// <summary>
        /// Construct a new Bloom filter based on existing Bloom filter data.
        /// </summary>
        /// <param name="bitSetSize">defines how many bits should be used for the filter.</param>
        /// <param name="expectedNumberOfFilterElements">defines the maximum number of elements the filter is expected to contain.</param>
        /// <param name="actualNumberOfFilterElements">specifies how many elements have been inserted into the <code>filterData</code> BitArray.</param>
        /// <param name="filterData">a BitArray representing an existing Bloom filter.</param>
        public BloomFilter(int bitSetSize, int expectedNumberOfFilterElements, int actualNumberOfFilterElements,
            BitArray filterData)
            : this(bitSetSize, expectedNumberOfFilterElements)
        {
            this._bitset = filterData;
            this._numberOfAddedElements = actualNumberOfFilterElements;
        }

        /// <summary>
        /// Generates a digest based on the contents of a string.
        /// </summary>
        /// <param name="val">specifies the input data.</param>
        /// <param name="charset">specifies the encoding of the input data.</param>
        /// <returns>digest as long.</returns>
        public static int CreateHash(string val, Encoding charset)
        {
            return CreateHash(charset.GetBytes(val));
        }

        /// <summary>
        /// Generates a digest based on the contents of a string.
        /// </summary>
        /// <param name="val">specifies the input data. The encoding is expected to be UTF-8.</param>
        /// <returns>digest as long.</returns>
        public static int CreateHash(string val)
        {
            return CreateHash(val, Charset);
        }

        /// <summary>
        /// Generates a digest based on the contents of an array of bytes.
        /// </summary>
        /// <param name="data">specifies input data.</param>
        /// <returns>digest as long.</returns>
        public static int CreateHash(byte[] data)
        {
            return CreateHashes(data, 1)[0];
        }

        /// <summary>
        /// Generates digests based on the contents of an array of bytes and splits the result into 4-byte int's and store them in an array. The
        /// digest function is called until the required number of int's are produced. For each call to digest a salt
        /// is prepended to the data. The salt is increased by 1 for each call.
        /// </summary>
        /// <param name="data">specifies input data</param>
        /// <param name="hashes">number of hashes/int's to produce</param>
        /// <returns>array of int-sized hashes</returns>
        public static int[] CreateHashes(byte[] data, int hashes)
        {
            int[] result = new int[hashes];

            int k = 0;
            byte[] salt = new byte[1];
            while (k < hashes)
            {
                byte[] digest;
                lock (DigestFunction)
                {
                    DigestFunction.Key = salt;
                    salt[0]++;
                    digest = DigestFunction.ComputeHash(data);
                }

                for (int i = 0; i < digest.Length / 4 && k < hashes; i++)
                {
                    int h = 0;
                    for (int j = (i * 4); j < (i * 4) + 4; j++)
                    {
                        h <<= 8;
                        h |= ((int) digest[j]) & 0xFF;
                    }

                    result[k] = h;
                    k++;
                }
            }

            return result;
        }

        /// <summary>
        /// Compares the contents of two instances to see if they are equal.
        /// </summary>
        /// <param name="obj">is the object to compare to.</param>
        /// <returns>True if the contents of the objects are equal.</returns>
        public override bool Equals(object obj)
        {
            if (obj == null)
            {
                return false;
            }

            if (!this.GetType().Equals(obj.GetType()))
            {
                return false;
            }

            BloomFilter other = (BloomFilter) obj;
            if (this._expectedNumberOfFilterElements != other._expectedNumberOfFilterElements)
            {
                return false;
            }

            if (this._k != other._k)
            {
                return false;
            }

            if (this._bitSetSize != other._bitSetSize)
            {
                return false;
            }

            if (this._bitset != other._bitset &&
                (this._bitset == null || !ObjectUtils.Equals(this._bitset, other._bitset)))
            {
                return false;
            }

            return true;
        }

        /// <summary>
        /// Calculates a hash code for this class.
        /// <remarks>performance concerns : note that we read all the bits of bitset to compute the hash</remarks>
        /// <returns>hash code representing the contents of an instance of this class.</returns>
        /// </summary>
        public override int GetHashCode()
        {
            int hash = 7;
            hash = 61 * hash + (this._bitset != null ? ObjectUtils.HashBytes(this._bitset) : 0);
            hash = 61 * hash + this._expectedNumberOfFilterElements;
            hash = 61 * hash + this._bitSetSize;
            hash = 61 * hash + this._k;
            return hash;
        }

        /// <summary>
        /// Calculates the expected probability of false positives based on
        /// the number of expected filter elements and the size of the Bloom filter.
        /// <br /><br />
        /// The value returned by this method is the <i>expected</i> rate of false
        /// positives, assuming the number of inserted elements equals the number of
        /// expected elements. If the number of elements in the Bloom filter is less
        /// than the expected value, the true probability of false positives will be lower.
        /// </summary>
        /// <returns>expected probability of false positives.</returns>
        public double ExpectedFalsePositiveProbability()
        {
            return GetFalsePositiveProbability(_expectedNumberOfFilterElements);
        }

        /// <summary>
        /// Calculate the probability of a false positive given the specified
        /// number of inserted elements.
        /// </summary>
        /// <param name="numberOfElements">number of inserted elements.</param>
        /// <returns>probability of a false positive.</returns>
        public double GetFalsePositiveProbability(double numberOfElements)
        {
            // (1 - e^(-k * n / m)) ^ k
            return Math.Pow((1 - Math.Exp(-_k * (double) numberOfElements
                                          / (double) _bitSetSize)), _k);
        }

        /// <summary>
        /// Get the current probability of a false positive. The probability is calculated from
        /// the size of the Bloom filter and the current number of elements added to it.
        /// </summary>
        /// <returns>probability of false positives.</returns>
        public double GetFalsePositiveProbability()
        {
            return GetFalsePositiveProbability(_numberOfAddedElements);
        }


        /// <summary>
        /// Returns the value chosen for K.<br />
        /// <br />
        /// K is the optimal number of hash functions based on the size
        /// of the Bloom filter and the expected number of inserted elements.
        /// </summary>
        /// <returns>optimal k.</returns>
        public int GetK()
        {
            return _k;
        }

        /// <summary>
        /// Sets all bits to false in the Bloom filter.
        /// </summary>
        public void Clear()
        {
            _bitset.SetAll(false);
            _numberOfAddedElements = 0;
        }

        /// <summary>
        /// Adds an object to the Bloom filter. The output from the object's
        /// ToString() method is used as input to the hash functions.
        /// </summary>
        /// <param name="element">is an element to register in the Bloom filter.</param>
        public void Add(object element)
        {
            Add(Charset.GetBytes(element.ToString()));
        }

        /// <summary>
        /// Adds an array of bytes to the Bloom filter.
        /// </summary>
        /// <param name="bytes">array of bytes to add to the Bloom filter.</param>
        public void Add(byte[] bytes)
        {
            int[] hashes = CreateHashes(bytes, _k);
            foreach (int hash in hashes)
            {
                _bitset.Set(MathUtils.Abs(hash % _bitSetSize), true);
            }

            _numberOfAddedElements++;
        }

        /// <summary>
        /// Adds all elements from a Collection to the Bloom filter.
        /// </summary>
        /// <param name="c">Collection of elements.</param>
        public void AddAll(IEnumerable<object> c)
        {
            foreach (object element in c)
            {
                Add(element);
            }
        }

        /// <summary>
        /// Adds all elements from a Collection to the Bloom filter.
        /// </summary>
        /// <param name="c">Collection of elements.</param>
        public void AddAll(IEnumerable<byte[]> c)
        {
            foreach (byte[] byteArray in c)
            {
                Add(byteArray);
            }
        }

        /// <summary>
        /// Returns true if the element could have been inserted into the Bloom filter.
        /// Use getFalsePositiveProbability() to calculate the probability of this
        /// being correct.
        /// </summary>
        /// <param name="element">element to check.</param>
        /// <returns>true if the element could have been inserted into the Bloom filter.</returns>
        public bool Contains(object element)
        {
            return Contains(Charset.GetBytes(element.ToString()));
        }

        /// <summary>
        /// Returns true if the array of bytes could have been inserted into the Bloom filter.
        /// Use getFalsePositiveProbability() to calculate the probability of this
        /// being correct.
        /// </summary>
        /// <param name="bytes">array of bytes to check.</param>
        /// <returns>true if the array could have been inserted into the Bloom filter.</returns>
        public bool Contains(byte[] bytes)
        {
            int[] hashes = CreateHashes(bytes, _k);
            foreach (int hash in hashes)
            {
                if (!_bitset.Get(MathUtils.Abs(hash % _bitSetSize)))
                {
                    return false;
                }
            }

            return true;
        }

        /// <summary>
        /// Returns true if all the elements of a Collection could have been inserted
        /// into the Bloom filter. Use getFalsePositiveProbability() to calculate the
        /// probability of this being correct.
        /// </summary>
        /// <param name="c">elements to check.</param>
        /// <returns>true if all the elements in c could have been inserted into the Bloom filter.</returns>
        public bool ContainsAll(IEnumerable<object> c)
        {
            foreach (object element in c)
            {
                if (!Contains(element)) return false;
            }

            return true;
        }

        /// <summary>
        /// Read a single bit from the Bloom filter.
        /// </summary>
        /// <param name="bit">the bit to read.</param>
        /// <returns>true if the bit is set, false if it is not.</returns>
        public bool GetBit(int bit)
        {
            return _bitset.Get(bit);
        }

        /// <summary>
        /// Set a single bit in the Bloom filter.
        /// </summary>
        /// <param name="bit">is the bit to set.</param>
        /// <param name="value">If true, the bit is set. If false, the bit is cleared.</param>
        public void SetBit(int bit, bool value)
        {
            _bitset.Set(bit, value);
        }

        /// <summary>
        /// Return the bit set used to store the Bloom filter.
        /// </summary>
        /// <returns>bit set representing the Bloom filter.</returns>
        public BitArray GetBitSet()
        {
            return _bitset;
        }

        /// <summary>
        /// Returns the number of bits in the Bloom filter. Use count() to retrieve
        /// the number of inserted elements.
        /// </summary>
        /// <returns>the size of the bitset used by the Bloom filter.</returns>
        public int Size()
        {
            return this._bitSetSize;
        }

        /// <summary>
        /// Returns the number of elements added to the Bloom filter after it
        /// was constructed or after clear() was called.
        /// </summary>
        /// <returns>number of elements added to the Bloom filter.</returns>
        public int Count()
        {
            return this._numberOfAddedElements;
        }

        /// <summary>
        /// Returns the expected number of elements to be inserted into the filter.
        /// This value is the same value as the one passed to the constructor.
        /// </summary>
        /// <returns>expected number of elements.</returns>
        public int GetExpectedNumberOfElements()
        {
            return _expectedNumberOfFilterElements;
        }

        /// <summary>
        /// Get expected number of bits per element when the Bloom filter is full. This value is set by the constructor
        /// when the Bloom filter is created. See also getBitsPerElement().
        /// </summary>
        /// <returns>expected number of bits per element.</returns>
        public double GetExpectedBitsPerElement()
        {
            return this._bitsPerElement;
        }

        /// <summary>
        /// Get actual number of bits per element based on the number of elements that have currently been inserted and the length
        /// of the Bloom filter. See also getExpectedBitsPerElement().
        /// </summary>
        /// <returns>number of bits per element.</returns>
        public double GetBitsPerElement()
        {
            return this._bitSetSize / (double) _numberOfAddedElements;
        }
    }
}
